Load data

XXX This needs data not included in the git release to be run XXX

load('../RData/kemri_case_data.RData')
load('Outputs/Kenya_dat_PSM.RData')
dat_kenya = cbind(kemri_case_data, dat_kenya)

Summaries

Summarise the top and bottom 20th percentiles

upper_p = round(quantile(dat_kenya$P_SMs, probs=.8),3)
print(upper_p)
##   80% 
## 0.924
lower_p = round(quantile(dat_kenya$P_SMs, probs=.2),3)
print(lower_p)
##   20% 
## 0.183
top_ind = dat_kenya$P_SMs>upper_p
bottom_ind = dat_kenya$P_SMs<lower_p

sum(top_ind)
## [1] 446
sum(bottom_ind)
## [1] 444
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% have sickle trait versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$HbAS[top_ind],na.rm = T),1),
                   round(100*mean(dat_kenya$HbAS[bottom_ind],na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 0.7% have sickle trait versus 4.8% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% died versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$died[top_ind],na.rm = T),1),
                   round(100*mean(dat_kenya$died[bottom_ind],na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 6.1% died versus 18.8% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% were blood culture + versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$bloodculture_pos[top_ind],na.rm = T),1),
                   round(100*mean(dat_kenya$bloodculture_pos[bottom_ind],na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 1.8% were blood culture + versus 4.7% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% has Hb<5 versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$severe_malaria_anaemia[top_ind],na.rm = T),1),
                   round(100*mean(dat_kenya$severe_malaria_anaemia[bottom_ind],na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 27.4% has Hb<5 versus 34.9% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% had fewer than 1,000 parasites per uL versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$log_parasites[top_ind]<3,na.rm = T),1),
                   round(100*mean(dat_kenya$log_parasites[bottom_ind]<3,na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 7.4% had fewer than 1,000 parasites per uL versus 13.7% for patients with P(SM|Data)< 0.183
writeLines(sprintf('For patients with P(SM|Data)>%s, %s%% had more than 100,000 parasites per uL versus %s%% for patients with P(SM|Data)< %s',
                   upper_p,
                   round(100*mean(dat_kenya$log_parasites[top_ind]>5,na.rm = T),1),
                   round(100*mean(dat_kenya$log_parasites[bottom_ind]>5,na.rm = T),1),
                   lower_p))
## For patients with P(SM|Data)>0.924, 57.4% had more than 100,000 parasites per uL versus 28.5% for patients with P(SM|Data)< 0.183

Some extra bits

myx = dat_kenya$P_SMs
xs = seq(0,1,length.out = 100)[-1]
##******** BLOOD CULTURE + ****************
par(mfrow=c(1,1))
mod = gam(BCP ~ s(x,k=4), family='binomial',
          data=data.frame(x=myx,
                          BCP=dat_kenya$bloodculture_pos))

summary(mod)
## 
## Family: binomial 
## Link function: logit 
## 
## Formula:
## BCP ~ s(x, k = 4)
## 
## Parametric coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -3.3651     0.1211   -27.8   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Approximate significance of smooth terms:
##        edf Ref.df Chi.sq p-value   
## s(x) 1.001  1.002  8.293   0.004 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## R-sq.(adj) =  0.00338   Deviance explained = 1.25%
## UBRE = -0.69768  Scale est. = 1         n = 2220
preds = predict(mod, se.fit = T, newdata = data.frame(x=xs))
ys = 100*c(inv.logit(preds$fit+1.96*preds$se.fit),
           rev(inv.logit(preds$fit-1.96*preds$se.fit)))
plot(xs, 100*inv.logit(preds$fit),type='l',lwd=3,
     xlim = c(0,1),ylim = range(c(0,ys)),
     xlab='P(Severe malaria | Data)',
     ylab='Blood culture positive (%)')
polygon(c(xs, rev(xs)),ys, panel.first=grid(),
        border = NA, col = adjustcolor('grey',.3))
mtext(text = '', side = 3, adj = 0, cex = 1.5)
abline(h = 100*mean(dat_kenya$bloodculture_pos),lty=2, lwd=2)

Explore anaemia a bit more

par(mfrow=c(2,2), las=1, bty='n')
layout(mat = matrix(data = c(1,1,2,3),nrow = 2, byrow = T))
plot(dat_kenya$P_SMs, dat_kenya$hb, 
     xlim=c(0,1),panel.first=grid(),
     pch=20, xlab='P(Severe malaria | Data)',ylab='Haemoglobin (g/dL)')
abline(h=5,v=c(upper_p, lower_p), lty=2,lwd=3, col='red')

hist(dat_kenya$hb[bottom_ind], xlim=c(0,15),
     breaks = 0:19, main = paste('P(Severe malaria | Data)',lower_p,sep='<'), 
     xlab='Hb (g/dL)')
abline(v=5, lty=2,lwd=3, col='red')
hist(dat_kenya$hb[top_ind], xlim=c(0,15),
     breaks = 0:19, main = paste('P(Severe malaria | Data)',upper_p,sep='>'), 
     xlab='Hb (g/dL)')
abline(v=5, lty=2,lwd=3, col='red')

f=colorRampPalette(colors = RColorBrewer::brewer.pal(name = 'RdYlBu',n = 11))
n_levels = 11
mycuts = cut(x = dat_kenya$P_SMs, breaks = seq(0,1,by=1/n_levels))
cols_ind = as.numeric(mycuts)
mycols = f(n_levels)

par(las=1, mfrow=c(1,1), bty='n', family='serif', 
    cex.lab=1.5, cex.axis=1.5, mar = c(5,6,2,2))
my_fade = 0.6

## Sickle trait and thal homozygous
plot(log10(dat_kenya$platelet), log10(dat_kenya$wbc),
     xaxt='n', yaxt='n', xlab='',
     ylab='', pch=16, panel.first = grid(),
     col = adjustcolor(mycols[cols_ind], my_fade))
mtext(side = 2, text = 'White count (x1000 per uL)', 
      cex=1.5, line = 3, las = 3)
mtext(side = 1, text = 'Platelet count (x1000 per uL)', 
      cex=1.5, line = 3)
axis(side = 1, at = 1:3, labels = 10^(1:3))
axis(side = 2, at = 0:2, labels = 10^(0:2))
axis(side = 1, at = log10(c(seq(10,100,by=10), seq(100,1000,by=100))), 
     tick = T, labels = F)
axis(side = 2, at = log10(c(seq(1,10,by=1), seq(10,100,by=10))), 
     tick = T, labels = F)
ind_double = dat_kenya$HbAS==1 & dat_kenya$thal==3
ind_single = dat_kenya$HbAS==1 & dat_kenya$thal<3

# points(log10(dat_kenya$platelet[ind_double]),
#        log10(dat_kenya$wbc[ind_double]),
#        col='black',pch=18,cex=1.8)
# legend('bottomright', pch = 18, col = c('black'),
#        inset = 0, cex=1.3,bty='n',
#        legend = c('HbAS & HZ-alpha-thal'))
lgd_ = rep(NA, n_levels)
lgd_[c(1,6,11)] = c(0,0.5,1)
legend('topleft',
       legend = lgd_,
       fill = mycols,bty='n',x.intersp =.5,
       border = NA,inset = 0.03,
       y.intersp = 0.5,title = 'P(SM | Data)\n',
       cex = 1, text.font = 1)

## Bacteremia
plot(log10(dat_kenya$platelet), log10(dat_kenya$wbc),
     xaxt='n', yaxt='n', xlab='',pch=16, ylab='', 
     col = adjustcolor(mycols[cols_ind], my_fade))
axis(side = 1, at = 1:3, labels = 10^(1:3))
axis(side = 2, at = 0:2, labels = 10^(0:2))
axis(side = 1, at = log10(c(seq(10,100,by=10), seq(100,1000,by=100))), 
     tick = T, labels = F)
axis(side = 2, at = log10(c(seq(1,10,by=1), seq(10,100,by=10))), 
     tick = T, labels = F)
mtext(side = 2, text = 'White count (x1000 per uL)', 
      cex=1.5, line = 3, las = 3)
mtext(side = 1, text = 'Platelet count (x1000 per uL)', 
      cex=1.5, line = 3)
ind_blood_cult = dat_kenya$bloodculture_pos==1

points(log10(dat_kenya$platelet[ind_blood_cult]),
       log10(dat_kenya$wbc[ind_blood_cult]),
       col='black',pch=18,cex=1.5)

ind_salmonella = grep(pattern = 'salmonella', 
                      x = dat_kenya$bacteria,ignore.case = T)
points(log10(dat_kenya$platelet[ind_salmonella]),
       log10(dat_kenya$wbc[ind_salmonella]),
       col='black',pch=1,cex=1.5)

legend('bottomright', pch = c(18,1), col = c('black'),
       inset = 0, cex=1.3, bty='n',
       legend = c('Bacteremia','NTS'))

all(kemri_case_data$sample_code==dat_kenya$sample_code)
## [1] TRUE
par(mfrow=c(2,1))
ind1 = (kemri_case_data$severe_malaria_anaemia==1)
hist(dat_kenya$P_SMs[ind1], breaks = 40, xlab='P(SM)', main='SMA')

ind1 = (kemri_case_data$cerebral_malaria==1)
hist(dat_kenya$P_SMs[ind1], breaks = 40, xlab='P(SM)', main='CM')